Overview

This project investigates Symbiodinium communities in scleractinian corals sampled at various sites on the north and south shores of St. John, US Virgin Islands in August 2012. The ITS2 region of nrDNA isolated from coral samples was amplified and sequenced by paired-end 300bp reads on an Illumina MiSeq platform. Sequence data is processed through a custom bioinformatic pipeline and resulting OTU tables are analyzed by various multivariate statistical approaches to ask questions regarding alpha and beta diversity of Symbiodinium communities within and across coral species. The major aims of this work are to 1) characterize Symbiodinium communities in a range of coral species using high-throughput sequencing (many for the first time), and 2) to forward novel bioinformatic and statistical approaches to better understand Symbiodinium ecology.

Bioinformatics

A custom pipeline is used here to process ITS2 sequence data. Briefly, paired reads were merged using the illumina-utils and filtered to keep only those with 3 mismatches or less. Chimeras were then removed by usearch. From here, sequence data was processed using both of two approaches:

  1. 97% OTU clustering within samples. With this approach, sequences from each individual sample were clustered at 97% similarity and the most abundant sequence from each cluster was selected as the representative sequence and assigned taxonomy by BLAST using a custom ITS2 reference database. Singletons were excluded. Identical OTUs across samples were then merged to create a global OTU table. By clustering at 97% within samples, as opposed to across the entire dataset, OTUs from different samples that have different representative sequences (i.e., different most abundant sequence within the 97% cluster) will remain separate even if these sequences are >97% similar to each other. This approach is used because Symbiodinium ITS2 sequences are know to be multi-copy and vary intragenomically, and distinct Symbiodinium types may contain identical sets of ITS2 sequences that occur in different relative proportions within the genome; therefore, selecting the most abundant sequence variant within a sample prevents a potentially distinct Symbiodinium type from being collapsed into an OTU with a different representative sequence that happens to be more abundant in other samples within the dataset.
  2. 100% OTU clustering. All ITS2 sequences were clustered at 100% identity and assigned taxonomy by BLAST to the custom reference database. Singletons were excluded.

OTU tables and taxonomic assignments from both of these bioinformatic approaches are used in comparative downstream analyses.

Data pre-processing

Import dataset

The phyloseq package is used here to combine the OTU table, taxonomic assignments, and sample metadata into a single R data object to facilitate downstream analyses.

# Import OTU tables
OTU97 <- otu_table(read.table("data/97_otus.tsv", header=T, check.names=F, row.names=1,
                              skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
OTU97bs <- otu_table(read.table("data/97_otus_bysample.tsv", header=T, check.names=F, row.names=1,
                              skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
OTU100 <- otu_table(read.table("data/100_otus.tsv", header=T, check.names=F, row.names=1,
                               skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
# Import taxonomy
TAX97 <- tax_table(as.matrix(read.table("data/97_otus_tax_assignments.txt", sep="\t", row.names=2,
                                        col.names=c("Clade", "OTU.ID", "Taxonomy", "Eval", "Subtype"))))
TAX97bs <- tax_table(as.matrix(read.table("data/97_otus_bysample_tax_assignments.txt", sep="\t", row.names=2,
                                        col.names=c("Clade", "OTU.ID", "Taxonomy", "Eval", "Subtype"))))
TAX100 <- tax_table(as.matrix(read.table("data/100_otus_tax_assignments.txt", sep="\t", row.names=2,
                                         col.names=c("Clade", "OTU.ID", "Taxonomy", "Eval", "Subtype"))))
# Import sample data
SAM <- sample_data(read.delim("data/mapping_file.txt", sep="\t", header=T, row.names=1))
# Build phyloseq objects
phy97 <- phyloseq(OTU97, TAX97, SAM)
phy97bs <- phyloseq(OTU97bs, TAX97bs, SAM)
phy100 <- phyloseq(OTU100, TAX100, SAM)

Descriptive statistics and distribution of OTU counts

# Compute summary statistics
stats <- function(phy) {
  return(
    data.frame(
      'Total count in OTU table'=sum(otu_table(phy)),
      'Number of OTUs'=ntaxa(phy),
      'Range of OTU counts'=paste0(range(taxa_sums(phy)), collapse=" - "),
      'Number of singleton OTUs'=length(taxa_sums(phy)[taxa_sums(phy) <= 1]),
      'Number of samples'=nsamples(phy),
      'Range of reads per sample'=paste0(range(sample_sums(phy)), collapse=" - "),
      'Arithmetic mean (±SD) reads per sample'=paste(as.integer(mean(sample_sums(phy))), 
                                                     as.integer(sd(sample_sums(phy))), sep=" ± "),
      'Geometric mean (±SD) reads per sample'=paste(as.integer(exp(mean(log(sample_sums(phy))))),
                                                    as.integer(exp(sd(log(sample_sums(phy))))), sep=" ± "),
      check.names=F)
  )
}
stats97 <- data.frame(`97% OTUs`=t(stats(phy97)), check.names=F)
stats97bs <- data.frame(`97% within-sample OTUs`=t(stats(phy97bs)), check.names=F)
stats100 <- data.frame(`100% OTUs`=t(stats(phy100)), check.names=F)

# Create and plot histograms
taxhist97 <- hist(log10(taxa_sums(phy97)), plot=F)
samhist97 <- hist(log10(sample_sums(phy97)), plot=F)

taxhist97bs <- hist(log10(taxa_sums(phy97bs)), plot=F)
samhist97bs <- hist(log10(sample_sums(phy97bs)), plot=F)

taxhist100 <- hist(log10(taxa_sums(phy100)), plot=F)
samhist100 <- hist(log10(sample_sums(phy100)), plot=F)

par(mfrow=c(2, 3), mar=c(3,3,1,1))
plot(taxhist97, col="black", main="97% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bs, col="black", main="97% within-sample OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist100, col="black", main="100% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(samhist97, col="black", main="97% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bs, col="black", main="97% within-sample OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist100, col="black", main="100% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)

# Create stats table
knitr::kable(cbind(stats97, stats97bs, stats100))
97% OTUs 97% within-sample OTUs 100% OTUs
Total count in OTU table 1492736 1492692 1056483
Number of OTUs 135 174 41449
Range of OTU counts 2 - 739709 2 - 474177 2 - 171487
Number of singleton OTUs 0 0 0
Number of samples 84 84 84
Range of reads per sample 3 - 169977 3 - 169975 1 - 112823
Arithmetic mean (±SD) reads per sample 17770 ± 20982 17770 ± 20982 12577 ± 14480
Geometric mean (±SD) reads per sample 9427 ± 5 9400 ± 5 6451 ± 6

Filter dataset

Filter samples by minimum count

# Set threshold number of reads
sn <- 0
# Remove samples with fewer reads than threshold
phy97.f <- prune_samples(sample_sums(phy97)>=sn, phy97)
phy97bs.f <- prune_samples(sample_sums(phy97bs)>=sn, phy97bs)
phy100.f <- prune_samples(sample_sums(phy100)>=sn, phy100)

Filter OTUs by minimum count

# Set threshold count
n <- 10
# Identify OTUs below threshold count
taxa97 <- taxa_sums(phy97.f)[which(taxa_sums(phy97.f) >= n)]
taxa97bs <- taxa_sums(phy97bs.f)[which(taxa_sums(phy97bs.f) >= n)]  
taxa100 <- taxa_sums(phy100.f)[which(taxa_sums(phy100.f) >= n)]
# Remove taxa below threshold count
phy97.f <- prune_taxa(names(taxa97), phy97.f)
phy97bs.f <- prune_taxa(names(taxa97bs), phy97bs.f)
phy100.f <- prune_taxa(names(taxa100), phy100.f)

# Refilter samples in case removal of OTUs caused any samples to drop below threshold
sn <- 200
phy97.f <- prune_samples(sample_sums(phy97.f)>=sn, phy97.f)
phy97bs.f <- prune_samples(sample_sums(phy97bs.f)>=sn, phy97bs.f)
phy100.f <- prune_samples(sample_sums(phy100.f)>=sn, phy100.f)

Filtered data: descriptive statistics and distribution of OTUs

# Compute summary statistics
stats97.f <- data.frame(`97% OTUs`=t(stats(phy97.f)), check.names=F)
stats97bs.f <- data.frame(`97% within-sample OTUs`=t(stats(phy97bs.f)), check.names=F)
stats100.f <- data.frame(`100% OTUs`=t(stats(phy100.f)), check.names=F)

# Create and plot histograms
taxhist97 <- hist(log10(taxa_sums(phy97.f)), plot=F)
taxhist97bs <- hist(log10(taxa_sums(phy97bs.f)), plot=F)
taxhist100 <- hist(log10(taxa_sums(phy100.f)), plot=F)
samhist97 <- hist(log10(sample_sums(phy97.f)), plot=F)
samhist97bs <- hist(log10(sample_sums(phy97bs.f)), plot=F)
samhist100 <- hist(log10(sample_sums(phy100.f)), plot=F)

par(mfrow=c(2, 3), mar=c(3,3,1,1))
plot(taxhist97, col="black", main="97% OTU counts", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bs, col="black", main="97% OTU counts", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist100, col="black", main="100% OTU counts", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(samhist97, col="black", main="97% OTU reads per sample", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bs, col="black", main="97% OTU reads per sample", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist100, col="black", main="100% OTU reads per sample", xlim=c(0, 5), las=1, mgp=c(2,0.5,0),
     xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)

# Create stats table
knitr::kable(cbind(stats97.f, stats97bs.f, stats100.f))
97% OTUs 97% within-sample OTUs 100% OTUs
Total count in OTU table 1492454 1492337 944975
Number of OTUs 102 117 4727
Range of OTU counts 10 - 739699 0 - 474176 10 - 171442
Number of singleton OTUs 0 1 0
Number of samples 80 80 80
Range of reads per sample 706 - 169958 707 - 169964 485 - 97036
Arithmetic mean (±SD) reads per sample 18655 ± 21113 18654 ± 21114 11812 ± 12750
Geometric mean (±SD) reads per sample 13165 ± 2 13161 ± 2 8152 ± 2

Transform count data

# Convert to proportion (relative abundance)
phy97.f.p <- transform_sample_counts(phy97.f, function(x) x/sum(x))
phy97bs.f.p <- transform_sample_counts(phy97bs.f, function(x) x/sum(x))
phy100.f.p <- transform_sample_counts(phy100.f, function(x) x/sum(x))
# Apply transformation function
transform <- function(x) sqrt(x/sum(x))  # Set transformation function
phy97.f.t <- transform_sample_counts(phy97.f, transform)  # Transform data
phy97bs.f.t <- transform_sample_counts(phy97bs.f, transform) 
phy100.f.t <- transform_sample_counts(phy100.f, transform)

Data analysis

Community composition barplots

Here the composition of each sample is plotted as a horizontal bar, sorted by species and shore. Each segment of the bar represents an individual OTU. In the lefthand plot, all OTUs are colored by clade, in the right OTU, all OTUs are given a unique color.

# Define function to plot Symbiodinium community composition
composition <- function(phy, col, legend=T) {
  samdat <- data.frame(sample_data(phy))
  samdat$Genus <- factor(samdat$Genus, levels=rev(levels(samdat$Genus)))
  samdat$Species <- factor(samdat$Species, levels=rev(levels(samdat$Species)))
  samdat$Shore <- factor(samdat$Shore, levels=rev(levels(samdat$Shore)))
  samdat <- samdat[with(samdat, order(Genus, Species, Shore)), ]
  typerelabund <- as.matrix(otu_table(phy)[order(data.frame(tax_table(phy))$Subtype), 
                                           rownames(samdat)])
  shorebreaks <- c(as.character(samdat$Shore), "X")==c("X", as.character(samdat$Shore))
  shorebreaks <- which(shorebreaks==F) - 1
  spbreaks <- c(which(duplicated(samdat$Species)==F) - 1, nrow(samdat))
  # Make Barplot
  barplot(typerelabund, horiz=T, space=0, axes=F,axisnames=F, yaxs="i", col=col)
  rect(0, 0, par("usr")[2], par("usr")[4], lwd=1, xpd=T)
  axis(side=1, at=seq(0, 1, 0.1), line=0, tck=-0.025, mgp=c(0,0.25,0), cex.axis=0.7)
  mtext(side=1, "Relative abundance", cex=0.7, line=1)
  # Add legend
  if (legend==T) {
    legend(x=par("usr")[2]/2, y=par("usr")[4], xjust=0.5, yjust=0.25, horiz=T, bty="n", xpd=T, 
           cex=0.7, legend=c("A", "B", "C", "D", "F", "G"), fill=taxcolors, x.intersp=0.5)
    legend(x=par("usr")[2]-0.03, y=par("usr")[4], xjust=0, yjust=0.1, bty="n", xpd=T, cex=0.6, 
           pt.cex=0.6, legend=c("N Shore", "S Shore"), fill=c(0, "gray40"), y.intersp=0.7, 
           x.intersp=0.3)
  }
  # Add grouping bars for Shore
  for (i in 1:length(shorebreaks)) {
    lines(c(0, 1), c(shorebreaks[i], shorebreaks[i]), lty=2, lwd=0.25)
    rect(1.01, shorebreaks[i], 1.04, shorebreaks[i+1], col=rep(c("gray40", 0), 50)[i], 
         lwd=0.25, xpd=T)
  }
  # Add lines to separate species and species names
  for (i in 1:length(spbreaks)) {
    lines(c(0, 1.07), c(spbreaks[i], spbreaks[i]), xpd=T, type="l", lwd=0.4)
    text(1.03, (spbreaks[i] + spbreaks[i+1]) / 2, xpd=T, pos=4, cex=0.6,
        labels=paste(samdat$Genus[which(duplicated(samdat$Species)==F)][i], "\n",
                    samdat$Species[which(duplicated(samdat$Species)==F)][i], sep=""))
  }
}

par(mfrow=c(1,2), mar=c(2, 1.5, 2, 5), lwd=0.1, cex=0.7)

# Plot composition of 97% OTUs colored by clade
composition(phy97.f.p, col=taxcolors[data.frame(tax_table(phy97.f.p))[order(data.frame(tax_table(phy97.f.p))$Subtype), ]$Clade], legend=T)

# Plot composition of 97% OTUs colored by OTU
composition(phy97.f.p, col=rainbow(ntaxa(phy97.f.p)), legend=F)

# Plot composition of 97% within-sample OTUs colored by clade
composition(phy97bs.f.p, col=taxcolors[factor(data.frame(tax_table(phy97bs.f.p))[order(data.frame(tax_table(phy97bs.f.p))$Subtype), ]$Clade, levels=c("A","B","C","D","F","G"))], legend=T)

# Plot composition of 97% within-sample OTUs colored by OTU
composition(phy97bs.f.p, col=rainbow(ntaxa(phy97bs.f.p)), legend=F)

par(mfrow=c(1,2), mar=c(2, 1.5, 2, 5), lwd=0.1, cex=0.7)
# Plot composition of 100% OTUs colored by clade
composition(phy100.f.p, col=taxcolors[data.frame(tax_table(phy100.f.p))[order(data.frame(tax_table(phy100.f.p))$Subtype), ]$Clade], legend=T)

# Plot composition of 100% OTUs colored by OTU
composition(phy100.f.p, col=rainbow(ntaxa(phy100.f.p)), legend=F)

Differences between species

Whether Symbiodinium communities differ among species is evaluated using permutational analysis of variance (PERMANOVA).

# PERMANOVA for difference among species
sppdiffs <- function(phy) {
  permanova.spp.t <- adonis(phyloseq::distance(phy, "bray") ~ Species, 
                          data=as(sample_data(phy), "data.frame"), permutations=9999)
  permanova.spp.t
  # Pairwise PERMANOVA tests for differences between species
  dmat <- as(phyloseq::distance(phy, "bray"), "matrix")
  df <- as(sample_data(phy), "data.frame")
  pairs <- data.frame(t(combn(levels(df$Species), 2)), stringsAsFactors=F)
  p.pairs <- setNames(rep(NA, nrow(pairs)), with(pairs, paste(X1, X2, sep='-')))
  for (i in 1:nrow(pairs)) {
    dfs <- subset(df, Species %in% c(pairs[i,1], pairs[i,2]))
    dmats <- dmat[rownames(dfs), rownames(dfs)]
    set.seed(152470)
    permanova <- adonis(dmats ~ Species, data=dfs, permutations=999)
    p.pairs[i] <- permanova$aov.tab$`Pr(>F)`[1]
  }
  # Return letters signifying differences 
  return(multcompLetters(p.pairs))
}

sppdiffs97 <- sppdiffs(phy97.f.t)
sppdiffs97bs <- sppdiffs(phy97bs.f.t)
sppdiffs100 <- sppdiffs(phy100.f.t)

sppdiffs <- data.frame("97% OTUs"=sppdiffs97$Letters, "97% within-sample OTUs"=sppdiffs97bs$Letters,
                       "100% OTUs"=sppdiffs100$Letters, check.names=F)
knitr::kable(sppdiffs, caption="Species not sharing a letter are significantly different (p < 0.05)")
Species not sharing a letter are significantly different (p < 0.05)
97% OTUs 97% within-sample OTUs 100% OTUs
alcicornis a ab a
annularis bc ab b
astreoides d c c
cavernosa e d d
cylindrus f a e
fragum a ab f
furcata b e g
radians g f h
siderea g d d
strigosa ac b b

Differences between shores

Whether Symbiodinium communities differ between north vs. south shores within species is tested here using PERMANOVA.

shorediffs <- function(phy) {
  # Compute within- and between-shore statistics for each Species
  shorestats <- data.frame(matrix(dimnames=list(levels(data.frame(sample_data(phy))$Species), 
                                                c("Species", "n", "overall", "within", 
                                                  "between", "R2", "bd", "p")), ncol=8, nrow=10))
  for (i in 1:nlevels(data.frame(sample_data(phy))$Species)) {
    sp <<- levels(data.frame(sample_data(phy))$Species)[i]
    shorestats$Species[i] <- sp
    phy.species <- subset_samples(phy, Species==sp)  # Set temp phyloseq object
    shorestats$n[i] <- nsamples(phy.species)
    md <- meandist(phyloseq::distance(phy.species, "bray"), 
                   sample_data(phy.species)$Shore)
    shorestats$within[i] <- summary(md)$W  # Weighted mean dissimilarity within both Pools
    shorestats$between[i] <- summary(md)$B  # Mean dissimilarity between Pools
    shorestats$overall[i] <- summary(md)$D  # Overall dissimilarity
    if (nlevels(as(sample_data(phy.species), "data.frame")$Shore) > 1) {
      #set.seed(70235)
      set.seed(43789)
      permanova <- adonis(phyloseq::distance(phy.species, "bray") ~ Shore, 
                          data=as(sample_data(phy.species), "data.frame"), permutations=999)
      shorestats$R2[i] <- permanova$aov.tab$"R2"[1]  # PERMANOVA partial R-squared
      shorestats$p[i] <- permanova$aov.tab$"Pr(>F)"[1]  # PERMANOVA p-value
      bd <- betadisper(phyloseq::distance(phy.species, "bray"),
                       group=as(sample_data(phy.species), "data.frame")$Shore)
      shorestats$bd[i] <- TukeyHSD(bd)$group[4]
    } else {
      shorestats$R2[i] <- NA
      shorestats$p[i] <- NA
      shorestats$bd[i] <- NA
    }
  }
  return(shorestats)
}

shorestats97 <- shorediffs(phy97.f.t)
shorestats97bs <- shorediffs(phy97bs.f.t)
shorediffs(phy97bs.f.p)
##               Species  n overall  within between    R2   bd     p
## alcicornis alcicornis 10 0.60398 0.65378 0.56414 0.037 1.00 0.798
## annularis   annularis  4 0.63204 0.63204     NaN    NA   NA    NA
## astreoides astreoides  5 0.00433 0.00433     NaN    NA   NA    NA
## cavernosa   cavernosa  7 0.00068 0.00073 0.00065 0.183 0.36 0.426
## cylindrus   cylindrus  8 0.02571 0.02696 0.02463 0.096 0.87 0.848
## fragum         fragum  8 0.25144 0.30904 0.20152 0.086 0.48 0.721
## furcata       furcata  9 0.70295 0.55499 0.82132 0.345 0.82 0.040
## radians       radians 10 0.02971 0.03101 0.02867 0.064 0.90 0.944
## siderea       siderea 10 0.51237 0.54145 0.48910 0.061 0.54 0.484
## strigosa     strigosa  9 0.78854 0.71022 0.85119 0.242 0.21 0.075
shorestats100 <- shorediffs(phy100.f.t)

knitr::kable(shorestats97, row.names=F, 
             caption="97% OTU data: differences within and between shore for each species")
97% OTU data: differences within and between shore for each species
Species n overall within between R2 bd p
alcicornis 10 0.13 0.13 0.13 0.11 0.17 0.44
annularis 4 0.59 0.59 NaN NA NA NA
astreoides 5 0.07 0.07 NaN NA NA NA
cavernosa 7 0.13 0.12 0.13 0.26 0.24 0.07
cylindrus 8 0.13 0.10 0.14 0.27 0.20 0.03
fragum 8 0.22 0.26 0.18 0.08 0.40 0.85
furcata 9 0.54 0.37 0.67 0.53 0.20 0.07
radians 10 0.13 0.14 0.13 0.06 0.96 0.84
siderea 10 0.30 0.28 0.32 0.21 0.19 0.02
strigosa 9 0.42 0.41 0.43 0.21 0.26 0.34
knitr::kable(shorestats97bs, row.names=F, 
             caption="97% within-sample OTU data: differences within and between shore for each species")
97% within-sample OTU data: differences within and between shore for each species
Species n overall within between R2 bd p
alcicornis 10 0.63 0.68 0.59 0.04 0.97 0.85
annularis 4 0.58 0.58 NaN NA NA NA
astreoides 5 0.06 0.06 NaN NA NA NA
cavernosa 7 0.01 0.02 0.01 0.16 0.39 0.67
cylindrus 8 0.13 0.14 0.13 0.14 0.54 0.62
fragum 8 0.27 0.33 0.22 0.09 0.48 0.79
furcata 9 0.71 0.56 0.84 0.36 0.81 0.05
radians 10 0.10 0.10 0.10 0.12 0.85 0.37
siderea 10 0.52 0.53 0.51 0.09 0.57 0.50
strigosa 9 0.81 0.76 0.86 0.23 0.23 0.08
knitr::kable(shorestats100, row.names=F, 
             caption="100% OTU data: differences within and between shore for each species")
100% OTU data: differences within and between shore for each species
Species n overall within between R2 bd p
alcicornis 10 0.64 0.65 0.64 0.10 0.57 0.54
annularis 4 0.73 0.73 NaN NA NA NA
astreoides 5 0.41 0.41 NaN NA NA NA
cavernosa 7 0.45 0.45 0.46 0.19 0.95 0.26
cylindrus 8 0.43 0.38 0.48 0.21 0.03 0.07
fragum 8 0.55 0.57 0.53 0.13 0.30 0.55
furcata 9 0.70 0.55 0.81 0.45 0.39 0.02
radians 10 0.40 0.40 0.39 0.07 0.50 0.93
siderea 10 0.67 0.68 0.66 0.10 0.45 0.48
strigosa 9 0.81 0.76 0.85 0.22 0.04 0.11

Beta diversity

Beta diversity is evaluated here as the multivariate dispersion of samples within a coral species group. Principal coordinate analysis on Bray-Curtis dissimilarities is used to calculate average distance-to-centroid values for each species group, which are then compared statistically by a permutation test. This analysis is implemented using betadisper() in the vegan package, based on Anderson (2006).

betad <- function(phy) {
  # Calculate distances to species centroids for each sample in principal coordinate space
  sambd <- betadisper(d=vegdist(t(otu_table(phy)), method="bray"), 
                      group=data.frame(sample_data(phy))$Species, type="centroid",
                      bias.adjust=TRUE)
  # Calculate each species' mean distance to centroid and standard error
  sambdsumm <- aggregate(data.frame(mean=sambd$distances), by=list(group=sambd$group), FUN=mean)
  sambdsumm$se <- aggregate(sambd$distances, by=list(group=sambd$group), 
                            FUN=function(x) sd(x)/sqrt(length(x)))$x
  # Determine order of decreasing mean dispersion
  sambdsumm.ord <- sambdsumm[order(sambdsumm$mean, decreasing=T), ]
  # Update betadisper object with species in decreasing order of mean dispersion
  sambd <- betadisper(d=vegdist(t(otu_table(phy)), method="bray"), 
                      group=factor(data.frame(sample_data(phy))$Species,
                                   levels=as.character(sambdsumm.ord$group)), 
                      type="centroid", bias.adjust=TRUE)
  # Use permutations to perform pairwise comparisons of group mean dispersions
  sampt <- permutest(sambd, pairwise=T)
  saml <- multcompLetters(sampt$pairwise$permuted)
  # Return results as a list
  return(list(sambdsumm.ord=sambdsumm.ord, sambdsumm=sambdsumm, saml=saml))
}

betad97 <- betad(phy97.f.t)
betad97bs <- betad(phy97bs.f.t)
betad100 <- betad(phy100.f.t)

# Figure: Distance to centroids
par(mfrow=c(1,3), mar=c(3.5,4,2,1), lwd=1)
with(betad97$sambdsumm.ord, {
  plot(mean, type="n", main="97% OTUs",
       ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
  arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
  points(1:10, mean, cex=1, pch=21, bg="white")
  text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75, 
       labels=levels(SAM$Species)[order(betad97$sambdsumm$mean, decreasing=T)])
  text(1:10, mean + se + 0.03, labels=betad97$saml$Letters[as.character(group)], cex=0.5)
})
with(betad97bs$sambdsumm.ord, {
  plot(mean, type="n", main="97% within-sample OTUs",
       ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
  arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
  points(1:10, mean, cex=1, pch=21, bg="white")
  text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75, 
       labels=levels(SAM$Species)[order(betad97bs$sambdsumm$mean, decreasing=T)])
  text(1:10, mean + se + 0.03, labels=betad97bs$saml$Letters[as.character(group)], cex=0.5)
})
with(betad100$sambdsumm.ord, {
  plot(mean, type="n", main="100% OTUs",
       ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
  arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
  points(1:10, mean, cex=1, pch=21, bg="white")
  text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75, 
       labels=levels(SAM$Species)[order(betad100$sambdsumm$mean, decreasing=T)])
  text(1:10, mean + se + 0.03, labels=betad100$saml$Letters[as.character(group)], cex=0.5)
})

otubarplot <- function(phy, main) {
  samdat <- data.frame(sample_data(phy))
  typerelabund <- as.matrix(otu_table(phy)[order(data.frame(tax_table(phy))$Subtype), 
                                                 rownames(samdat)])
  typerelabund <- typerelabund[, rownames(sample_data(phy)[with(sample_data(phy), 
                                                                order(Shore, Site, InputFileName))])]
  # Get info for plotting OTU names and blast hits on top of barplot
  blasthits <- as.character(data.frame(tax_table(phy))[order(data.frame(tax_table(phy))$Subtype), "Subtype"])
  names <- as.character(rownames(data.frame(tax_table(phy)))[order(data.frame(tax_table(phy))$Subtype)])
  samples <- colnames(typerelabund)
  shores <- data.frame(sample_data(phy))[samples, "Shore"]
  sites <- data.frame(sample_data(phy))[samples, "Site"]
  divides <- apply(typerelabund, 2, cumsum)
  heights <- diff(divides)
  heights[heights < 0.04] <- NA
  # Plot barplot and OTU names and blast hits
  bars <- barplot(typerelabund, col=rainbow(ntaxa(phy)), las=1, space=0.1, xaxs="i",
                  xlab="", ylab="", xaxt="n")
  text(bars, -0.06, labels=samples, xpd=T)
  text(bars, -0.10, labels=shores, xpd=T)
  text(bars, -0.14, labels=substr(sites, 1, 2), xpd=T)
  text(bars[1]/4, c(-0.06, -0.10, -0.14), labels=c("sample:", "shore:", "site:"), xpd=T, pos=2)
  mtext(side=2, text = "Relative Abundance", line=2)
  for (i in 1:length(bars)) {
    text(rep(bars[i], length(heights[which(!is.na(heights[,i])),i])), 
         divides[which(!is.na(heights[,i])),i] + heights[which(!is.na(heights[,i])),i] / 2, 
         labels=paste(names[which(!is.na(heights[,i]))+1],blasthits[which(!is.na(heights[,i]))+1],sep="\n"),
         cex=0.75)
  }
  mtext(side=3, main, xpd=T, adj=0, line=0.5, font=2)
}

Individual coral species

Diploria strigosa

# Create subsetted phyloseq objects for Diploria strigosa
dstr97.f.p <- subset_samples(phy97.f.p, Species=="strigosa")
dstr97bs.f.p <- subset_samples(phy97bs.f.p, Species=="strigosa")
dstr100.f.p <- subset_samples(phy100.f.p, Species=="strigosa")
# Plot custom barplots for Diploria strigosa
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(dstr97.f.p, main="97% OTUs")
otubarplot(dstr97bs.f.p, main="97% within-sample OTUs")
otubarplot(dstr100.f.p, main="100% OTUs")

Porites furcata

# Create subsetted phyloseq objects for Porites furcata
pfur97.f.p <- subset_samples(phy97.f.p, Species=="furcata")
pfur97bs.f.p <- subset_samples(phy97bs.f.p, Species=="furcata")
pfur100.f.p <- subset_samples(phy100.f.p, Species=="furcata")
# Plot custom barplots for Porites furcata
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(pfur97.f.p, main="97% OTUs")
otubarplot(pfur97bs.f.p, main="97% within-sample OTUs")
otubarplot(pfur100.f.p, main="100% OTUs")

Orbicella annularis

# Create subsetted phyloseq objects for Orbicella annularis
oann97.f.p <- subset_samples(phy97.f.p, Species=="annularis")
oann97bs.f.p <- subset_samples(phy97bs.f.p, Species=="annularis")
oann100.f.p <- subset_samples(phy100.f.p, Species=="annularis")
# Plot custom barplots for Orbicella annularis
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(oann97.f.p, main="97% OTUs")
otubarplot(oann97bs.f.p, main="97% within-sample OTUs")
otubarplot(oann100.f.p, main="100% OTUs")

Millepora alcicornis

# Create subsetted phyloseq objects for Millepora alcicornis
malc97.f.p <- subset_samples(phy97.f.p, Species=="alcicornis")
malc97bs.f.p <- subset_samples(phy97bs.f.p, Species=="alcicornis")
malc100.f.p <- subset_samples(phy100.f.p, Species=="alcicornis")
# Plot custom barplots for Millepora alcicornis
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(malc97.f.p, main="97% OTUs")
otubarplot(malc97bs.f.p, main="97% within-sample OTUs")
otubarplot(malc100.f.p, main="100% OTUs")

Siderastrea siderea

# Create subsetted phyloseq objects for Siderastrea siderea
ssid97.f.p <- subset_samples(phy97.f.p, Species=="siderea")
ssid97bs.f.p <- subset_samples(phy97bs.f.p, Species=="siderea")
ssid100.f.p <- subset_samples(phy100.f.p, Species=="siderea")
# Plot custom barplots for Siderastrea siderea
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(ssid97.f.p, main="97% OTUs")
otubarplot(ssid97bs.f.p, main="97% within-sample OTUs")
otubarplot(ssid100.f.p, main="100% OTUs")

Favia fragum

# Create subsetted phyloseq objects for Favia fragum
ffra97.f.p <- subset_samples(phy97.f.p, Species=="fragum")
ffra97bs.f.p <- subset_samples(phy97bs.f.p, Species=="fragum")
ffra100.f.p <- subset_samples(phy100.f.p, Species=="fragum")
# Plot custom barplots for Favia fragum
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(ffra97.f.p, main="97% OTUs")
otubarplot(ffra97bs.f.p, main="97% within-sample OTUs")
otubarplot(ffra100.f.p, main="100% OTUs")

Siderastrea radians

# Create subsetted phyloseq objects for Siderastrea radians
srad97.f.p <- subset_samples(phy97.f.p, Species=="radians")
srad97bs.f.p <- subset_samples(phy97bs.f.p, Species=="radians")
srad100.f.p <- subset_samples(phy100.f.p, Species=="radians")
# Plot custom barplots for Siderastrea radians
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(srad97.f.p, main="97% OTUs")
otubarplot(srad97bs.f.p, main="97% within-sample OTUs")
otubarplot(srad100.f.p, main="100% OTUs")

Porites astreoides

# Create subsetted phyloseq objects for Porites astreoides
past97.f.p <- subset_samples(phy97.f.p, Species=="astreoides")
past97bs.f.p <- subset_samples(phy97bs.f.p, Species=="astreoides")
past100.f.p <- subset_samples(phy100.f.p, Species=="astreoides")
# Plot custom barplots for Porites astreoides
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(past97.f.p, main="97% OTUs")
otubarplot(past97bs.f.p, main="97% within-sample OTUs")
otubarplot(past100.f.p, main="100% OTUs")

Dendrogyra cylindrus

# Create subsetted phyloseq objects for Dendrogyra cylindrus
dcyl97.f.p <- subset_samples(phy97.f.p, Species=="cylindrus")
dcyl97bs.f.p <- subset_samples(phy97bs.f.p, Species=="cylindrus")
dcyl100.f.p <- subset_samples(phy100.f.p, Species=="cylindrus")
# Plot custom barplots for Dendrogyra cylindrus
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(dcyl97.f.p, main="97% OTUs")
otubarplot(dcyl97bs.f.p, main="97% within-sample OTUs")
otubarplot(dcyl100.f.p, main="100% OTUs")

Montastraea cavernosa

# Create subsetted phyloseq objects for Montastraea cavernosa
mcav97.f.p <- subset_samples(phy97.f.p, Species=="cavernosa")
mcav97bs.f.p <- subset_samples(phy97bs.f.p, Species=="cavernosa")
mcav100.f.p <- subset_samples(phy100.f.p, Species=="cavernosa")
# Plot custom barplots for Montastraea cavernosa
par(mfrow=c(3,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(mcav97.f.p, main="97% OTUs")
otubarplot(mcav97bs.f.p, main="97% within-sample OTUs")
otubarplot(mcav100.f.p, main="100% OTUs")

Network analysis from tutorial

make.net <- function(physeq, n) {
  # Remove taxa that are not present in subset
  physeq <- prune_taxa(rowSums(otu_table(physeq))!=0, physeq)
  # Convert OTU table to "edges" table (weight = relative abundance)
  otudf <- data.frame(t(otu_table(physeq)), check.names=F)
  edges <- setNames(melt(as.matrix(otudf)), nm=c("source", "target", "weight"))  # Melt data
  edges <- edges[edges$weight>n, ]
  # Create "nodes" table from sample and tax data
  sdf <- data.frame(sample_data(physeq))
  sdf$id <- rownames(sdf)
  tdf <- data.frame(tax_table(physeq))
  tdf$id <- rownames(tdf)
  nodes <- merge(sdf, tdf, all=T)
  # Create network
  net <- graph_from_data_frame(d=edges, vertices=nodes, directed=F)
  return(net)
}
dstr.net <- make.net(dstr97bs.f.p, 0)

set.seed(5453890)
plot(dstr.net,
     edge.curved=0.1, 
     edge.width=10*(E(dstr.net)$weight)^0.2,
     vertex.label=substr(V(dstr.net)$Subtype, 1, 4),
     vertex.color=taxcolors[factor(V(dstr.net)$Clade, levels=c("A","B","C","D","F","G"))])

pfur.net <- make.net(pfur97bs.f.p, 0)
plot(pfur.net,
     edge.curved=0.1, 
     edge.width=10*(E(pfur.net)$weight)^0.2,
     vertex.label=substr(V(pfur.net)$Subtype, 1, 4),
     vertex.color=taxcolors[factor(V(pfur.net)$Clade, levels=c("A","B","C","D","F","G"))])

oann.net <- make.net(oann97bs.f.p, 0)
plot(oann.net,
     edge.curved=0.1, 
     edge.width=10*(E(oann.net)$weight)^0.2,
     vertex.label=substr(V(oann.net)$Subtype, 1, 4),
     vertex.color=taxcolors[factor(V(oann.net)$Clade, levels=c("A","B","C","D","F","G"))])

malc.net <- make.net(malc97bs.f.p, 0)
plot(malc.net,
     edge.curved=0.1, 
     edge.width=10*(E(malc.net)$weight)^0.2,
     vertex.label=substr(V(malc.net)$Subtype, 1, 4),
     vertex.color=taxcolors[factor(V(malc.net)$Clade, levels=c("A","B","C","D","F","G"))])

set.seed(9147)
ssid.net <- make.net(ssid97bs.f.p, 0)
plot(ssid.net,
     edge.curved=0.1, 
     edge.width=10*(E(ssid.net)$weight)^0.2,
     vertex.label=substr(V(ssid.net)$Subtype, 1, 4),
     vertex.color=taxcolors[factor(V(ssid.net)$Clade, levels=c("A","B","C","D","F","G"))])

ffra.net <- make.net(ffra97bs.f.p, 0)
plot(ffra.net,
     edge.curved=0.1, 
     edge.width=10*(E(ffra.net)$weight)^0.2,
     vertex.label=substr(V(ffra.net)$Subtype, 1, 4),
     vertex.color=taxcolors[factor(V(ffra.net)$Clade, levels=c("A","B","C","D","F","G"))])

srad.net <- make.net(srad97bs.f.p, 0)
plot(srad.net,
     edge.curved=0.1, 
     edge.width=10*(E(srad.net)$weight)^0.2,
     vertex.label=substr(V(srad.net)$Subtype, 1, 4),
     vertex.color=taxcolors[factor(V(srad.net)$Clade, levels=c("A","B","C","D","F","G"))])

past.net <- make.net(past97bs.f.p, 0)
plot(past.net,
     edge.curved=0.1, 
     edge.width=10*(E(past.net)$weight)^0.2,
     vertex.label=substr(V(past.net)$Subtype, 1, 4),
     vertex.color=taxcolors[factor(V(past.net)$Clade, levels=c("A","B","C","D","F","G"))])

dcyl.net <- make.net(dcyl97bs.f.p, 0)
plot(dcyl.net,
     edge.curved=0.1, 
     edge.width=10*(E(dcyl.net)$weight)^0.2,
     vertex.label=substr(V(dcyl.net)$Subtype, 1, 4),
     vertex.color=taxcolors[factor(V(dcyl.net)$Clade, levels=c("A","B","C","D","F","G"))])

mcav.net <- make.net(mcav97bs.f.p, 0)
plot(mcav.net,
     edge.curved=0.1, 
     edge.width=10*(E(mcav.net)$weight)^0.2,
     vertex.label=substr(V(mcav.net)$Subtype, 1, 4),
     vertex.color=taxcolors[factor(V(mcav.net)$Clade, levels=c("A","B","C","D","F","G"))])

# Compute node degrees (#links) and use that to set node size:
deg <- degree(mcav.net, mode="all")
V(mcav.net)$size <- 20

plot(mcav.net, layout=layout_with_fr(mcav.net))

# Get network with one node for each species.
# Edge weight is the mean relative abundance of OTU.
# Convert OTU table to "edges" table (weight = relative abundance)
  otudf <- data.frame(t(otu_table(phy97bs.f.p)), check.names=F)
  # Average relative abundance of OTU within species
  #sp.ag <- aggregate(otudf, by=list(Species=data.frame(sample_data(phy97.f.p))$Species), FUN=mean)
  # Percent of samples of species that contain OTU at more than 1% relative abundance
  sp.ag <- aggregate(otudf, by=list(Species=data.frame(sample_data(phy97bs.f.p))$Species), 
                     FUN=function(x) length(x[x>0.01])/length(x))

  rownames(sp.ag) <- sp.ag$Species
  sp.ag <- sp.ag[, -1]
  edges <- setNames(melt(as.matrix(sp.ag)), nm=c("source", "target", "weight"))  # Melt data
  edges <- droplevels(edges[edges$weight>0, ])
  # Create "nodes" table from sample and tax data
  sdf <- data.frame(id=levels(data.frame(sample_data(phy97bs.f.p))$Species))
  tdf <- data.frame(tax_table(phy97bs.f.p))[rownames(data.frame(tax_table(phy97bs.f.p))) %in% edges$target, ]
  tdf$id <- rownames(tdf)
  nodes <- merge(sdf, tdf, all=T)
  nodes$type <- ifelse(is.na(nodes$Clade), 1, 2)
  # Create network
  net <- graph_from_data_frame(d=edges, vertices=nodes, directed=F)

  # Best one for now ---------
  V(net)$shape <- c("square", "circle")[factor(V(net)$type)]
  V(net)$size <- ifelse(is.na(V(net)$Clade), 15, 10*sqrt(degree(net))) #5*sqrt(degree(net))
  V(net)$label <- ifelse(is.na(V(net)$Clade), names(V(net)),  
                         unlist(lapply(strsplit(V(net)$Subtype, split="_"), "[", 1)))
  V(net)$label <- str_replace_all(V(net)$label, c("alcicornis"="M.alc.", "annularis"="O.ann",
                                                  "astreoides"="P.ast.", "cavernosa"="M.cav.",
                                                  "cylindrus"="D.cyl.", "fragum"="F.fra.", 
                                                  "furcata"="P.fur.", "radians"="S.rad.", 
                                                  "siderea"="S.sid.", "strigosa"="D.str."))
  V(net)$color <- ifelse(is.na(V(net)$Clade), "white",
                         taxcolors[factor(V(net)$Clade, levels=c("A","B","C","D","F","G"))])
  E(net)$color <- "gray60"
  E(net)$width <- 15 * E(net)$weight
  set.seed(12347)
  set.seed(12364)
  set.seed(12374)
  l <- layout.fruchterman.reingold(net)
  l <- norm_coords(l, ymin=-1, ymax=1, xmin=-1, xmax=1)
  par(mar=c(0,0,0,0))
  plot(net, rescale=F, layout=l*1, edge.curved=0.1, vertex.label.cex=V(net)$size/20,
       vertex.label.color="black")

  #legend(x=-1.05, y=1.1, c("CladeA", "CladeB", "CladeC", "CladeD", "CladeF", "CladeG"), pch=21,
  #     col="#777777", pt.bg=taxcolors, pt.cex=1.1, cex=.6, bty="n", ncol=2, text.width=0.2)